ampliconFastaFile <- params$ampliconFastaFile
ampliconName <- params$ampliconName
sampleSheetFile <- params$sampleSheetFile
indelsFolder <- params$indelsFolder
cutSitesFile <- params$cutSitesFile
prefix <- params$prefix
workdir <- params$workdir
inputParameterDesc <- c('Amplicon Fasta File',
'Amplicon Name',
'sgRNA Cut Sites File',
'Experiment Data File',
'indels folder path',
'Prefix for output files',
'Working directory'
)
inputParameterValues <- c(ampliconFastaFile,
ampliconName,
cutSitesFile,
sampleSheetFile,
indelsFolder,
prefix,
workdir)
inputSettings <- data.frame(parameters = inputParameterDesc,
values = inputParameterValues,
stringsAsFactors = FALSE)
DT::datatable(data = inputSettings,
extensions = 'FixedColumns',
options = list(fixedColumns = TRUE,
scrollX = TRUE,
pageLength = nrow(inputSettings),
dom = 't'))
sampleSheet <- read.csv(sampleSheetFile, stringsAsFactors = F)
sampleSheet <- sampleSheet[sampleSheet$amplicon == ampliconName,]
DT::datatable(data = sampleSheet,
extensions = 'FixedColumns',
options = list(fixedColumns = TRUE,
scrollX = TRUE,
dom = 't'))
cutSites <- read.table(cutSitesFile, stringsAsFactors = F)
colnames(cutSites) <- c('sgRNA', 'cutSite')
DT::datatable(data = cutSites)
# import stats tables
coverageStats <- as.data.table(do.call(rbind, lapply(sampleSheet$sample_name, function(sampleName) {
f <- file.path(indelsFolder, paste0(sampleName, '.coverageStats.tsv'))
if(file.exists(f)) {
dt <- data.table::fread(f)
return(dt)
}
})))
#create a matrix of bp versus samples where values are coverage
dt <- dcast.data.table(coverageStats, sample ~ bp, value.var = 'cov', fill = 0)
M <- as.matrix(dt[,-1])
rownames(M) <- dt$sample
pheatmap::pheatmap(M,
cluster_rows = nrow(M) > 1,
cluster_cols = F,
show_colnames = F,
main = paste(ampliconName, 'Coverage Profiles'))
p <- ggplot(coverageStats, aes(x = bp, y = cov, group = sample), height = 500) +
geom_line(aes(color = sample), show.legend = F) +
theme_bw() +
labs(x = 'base position', y = 'Number of Reads', title = paste(ampliconName, 'Coverage Profiles'))
ggplotly(p)
plotScores <- function(indelsFolder, sampleName, sampleSheet, cutSites) {
f <- file.path(indelsFolder, paste0(sampleName, '.coverageStats.tsv'))
dt <- data.table::fread(f)
sgRNAs <- unlist(strsplit(x = sampleSheet[sampleSheet$sample_name == sampleName,]$sgRNA_ids,
split = ':'))
sgRNAs <- merge(data.frame('sgRNA' = sgRNAs, 'sample_name' = sampleName), cutSites, by = 'sgRNA')
mdt <- melt(dt[,-c('ins', 'del', 'indel', 'cov')], id.vars = c('bp', 'sample', 'seqname'))
p <- ggplot(mdt, aes(x = bp, y = value, group = variable)) +
geom_line(aes(color = variable)) + labs(title = sampleName)
if(nrow(sgRNAs) > 0) {
p <- p + geom_vline(data = sgRNAs, aes(xintercept = cutSite,
color = sgRNA), linetype = 'dotted')
}
p <- p +
theme_bw(base_size = 14) +
scale_y_continuous(labels = scales::percent)
return(p)
}
plots <- lapply(sampleSheet$sample_name, function(sampleName) {
plotScores(indelsFolder = indelsFolder,
sampleName = sampleName,
sampleSheet = sampleSheet,
cutSites = cutSites)
})
names(plots) <- sampleSheet$sample_name
if(length(plots) > 10) {
cat("## Indel Score Profiles {.tabset .tabset-dropdown}\n\n")
} else {
cat("## Indel Score Profiles {.tabset}\n\n")
}
out = NULL
for (n in names(plots)) {
p <- ggplotly(plots[[n]])
out = c(out, knitr::knit_expand(text='### {{n}} \n\n {{p}} \n\n'))
}
# this is a table for all samples versus all cut sites in the amplicon
# not all sample x sgRNA combinations are true,
# but still they are also computed to serve as a negative control
cutSiteStats <- as.data.table(do.call(rbind, lapply(sampleSheet$sample_name, function(sampleName) {
f <- file.path(indelsFolder, paste0(sampleName, '.indel_stats_at_cutsites.tsv'))
if(file.exists(f)) {
dt <- fread(f)
return(dt)
}
})))
# match samples to the actual sgRNA guides used in that sample
sampleGuides <- lapply(sampleSheet$sample_name, function(s) {
sgRNAs <- unlist(strsplit(x = sampleSheet[sampleSheet$sample_name == s,]$sgRNA_ids,
split = ':'))
})
names(sampleGuides) <- as.character(sampleSheet$sample_name)
cutSiteStats$sampleMatchesGuide <- as.factor(apply(cutSiteStats, 1, function(x) {
s <- as.character(x[['sample']])
g <- as.character(x[['sgRNA']])
return(g %in% sampleGuides[[s]])
}))
plots <- lapply(as.vector(unique(cutSiteStats$sgRNA)), function(sg) {
dt <- cutSiteStats[sgRNA == sg & sampleMatchesGuide == TRUE]
if(nrow(dt) > 0) {
p <- ggplot(dt, aes(x = sample, y = indelEfficiency)) +
geom_bar(stat = 'identity', aes(fill = sample)) +
labs(title = sg) + coord_flip()
return(p)
}
})
names(plots) <- as.vector(unique(cutSiteStats$sgRNA))
if(length(plots) > 10) {
cat("## Indel Stats At Cut Sites {.tabset .tabset-dropdown}\n\n")
} else {
cat("## Indel Stats At Cut Sites {.tabset}\n\n")
}
out = NULL
for (n in names(plots)) {
if(!is.null(plots[[n]])) {
p <- ggplotly(plots[[n]])
out = c(out, knitr::knit_expand(text='### {{n}} \n\n {{p}} \n\n'))
} else {
out = c(out, knitr::knit_expand(text='### {{n}} \n\n No indel data found\n\n'))
}
}
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
No indel data found
dts <- crosstalk::SharedData$new(cutSiteStats)
bscols(widths = c(4,8),
list(
filter_checkbox("sampleMatchesGuide", "Own Guide", dts, ~factor(sampleMatchesGuide), inline = TRUE),
filter_select( "sgRNA", "Select sgRNA", dts, ~factor(sgRNA), allLevels = FALSE, multiple = FALSE),
filter_select( "sample", "Select samples", dts, ~factor(sample), allLevels = FALSE, multiple = TRUE)
),
plotly::plot_ly(data = dts,
x = ~ sgRNA,
y = ~ indelEfficiency,
group = ~sgRNA,
color = ~sample,
type = 'bar',
text = ~sample,
height = 500) %>%
layout(showlegend = FALSE, xaxis = list(showticklabels = TRUE), barmode = 'group')
)
# indels: a data.table object with minimal columns: start, end,
# cutSites: a data.frame/data.table object with minimal columns: sgRNA and cutsite
# return: data.frame (nrow = nrow(indels), columns are sgRNA ids,
# values are 1 if indel overlaps cutsite, otherwise 0.
overlapCutSites <- function(indels, cutSites, extend = 3) {
target <- IRanges(start = cutSites$cutSite - extend,
end = cutSites$cutSite + extend,
names = cutSites$sgRNA)
query <- IRanges(start = indels$start, end = indels$end)
startOverlaps <- as.data.table(findOverlaps(start(query), target, type = 'any'))
endOverlaps <- as.data.table(findOverlaps(end(query), target, type = 'any'))
overlaps <- merge(startOverlaps, endOverlaps, by = c('queryHits', 'subjectHits'), all = TRUE)
M <- matrix(data = rep(0, nrow(indels) * length(target)),
nrow = nrow(indels), ncol = length(target))
colnames(M) <- names(target)
M[as.matrix(overlaps)] <- 1
return(M)
}
#import all detected indels (unfiltered) from all samples
allIndels <- as.data.table(do.call(rbind, lapply(sampleSheet$sample_name, function(sampleName) {
f <- file.path(indelsFolder, paste0(sampleName, '.indels.unfiltered.tsv'))
if(file.exists(f)) {
dt <- fread(f)
return(dt)
}
})))
#summarize indels by read support
indelSummary <- allIndels[,length(unique(readID)), by = c('seqname', 'sample', 'start', 'end', 'indelType')]
colnames(indelSummary)[6] <- 'ReadSupport'
dt <- cbind(indelSummary,
overlapCutSites(indels = indelSummary,
cutSites = cutSites, extend = 3))
mdt <- melt(dt, id.vars = colnames(indelSummary))
#make plots for different score thresholds, facet by samples, guides, and indel types
supportThresholds <- c(0, 1, 5, 10)
plots <- lapply(supportThresholds, function(s) {
df <- mdt[ReadSupport > s & value == 1,
length(seqname),
by = c('sample', 'variable', 'indelType')]
if(nrow(df) > 0) {
p <- ggplot(data = df, aes(x = sample, y = variable)) +
#geom_point(aes(size = V1)) +
geom_tile(aes(fill = V1))
if(nrow(df) < 250) {
p <- p +
geom_text(aes(label = V1), color = 'white') + facet_wrap(~ indelType, ncol = 1)
}
p <- p + theme_bw() +
theme(axis.text.x = element_text(angle = 90), legend.position = 'none')
return(p)
} else {
NULL
}
})
names(plots) <- paste("Read Support >",supportThresholds)
for (i in 1:length(plots)) {
cat("## ",names(plots)[i],"\n\n")
p <- plots[[i]]
if(!is.null(p)) {
print(plots[[i]])
} else {
cat("No genotypes detected")
}
cat("\n\n")
}
No genotypes detected
No genotypes detected
No genotypes detected
print(sessionInfo())
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-conda_cos6-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.4 LTS
##
## Matrix products: default
## BLAS/LAPACK: /home/edanner/anaconda3/lib/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=de_DE.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 methods stats graphics grDevices utils
## [8] datasets base
##
## other attached packages:
## [1] bindrcpp_0.2.2 crosstalk_1.0.0 plotly_4.7.1
## [4] DT_0.4 rtracklayer_1.38.3 GenomicRanges_1.30.3
## [7] GenomeInfoDb_1.14.0 IRanges_2.12.0 S4Vectors_0.16.0
## [10] BiocGenerics_0.24.0 data.table_1.11.4 ggrepel_0.8.0
## [13] ggplot2_3.0.0 knitr_1.20
##
## loaded via a namespace (and not attached):
## [1] Biobase_2.38.0 httr_1.3.1
## [3] tidyr_0.8.1 jsonlite_1.5
## [5] viridisLite_0.3.0 shiny_1.1.0
## [7] assertthat_0.2.0 GenomeInfoDbData_1.0.0
## [9] Rsamtools_1.30.0 yaml_2.1.19
## [11] pillar_1.2.3 backports_1.1.2
## [13] lattice_0.20-35 glue_1.2.0
## [15] digest_0.6.15 RColorBrewer_1.1-2
## [17] promises_1.0.1 XVector_0.18.0
## [19] colorspace_1.3-2 htmltools_0.3.6
## [21] httpuv_1.4.4.2 Matrix_1.2-14
## [23] plyr_1.8.4 XML_3.98-1.11
## [25] pkgconfig_2.0.1 pheatmap_1.0.10
## [27] zlibbioc_1.24.0 purrr_0.2.5
## [29] xtable_1.8-2 scales_0.5.0
## [31] later_0.7.3 BiocParallel_1.12.0
## [33] tibble_1.4.2 withr_2.1.2
## [35] SummarizedExperiment_1.8.1 lazyeval_0.2.1
## [37] magrittr_1.5 mime_0.5
## [39] evaluate_0.10.1 tools_3.4.3
## [41] matrixStats_0.53.1 stringr_1.3.1
## [43] munsell_0.5.0 DelayedArray_0.4.1
## [45] Biostrings_2.46.0 compiler_3.4.3
## [47] rlang_0.2.1 grid_3.4.3
## [49] RCurl_1.95-4.10 htmlwidgets_1.2
## [51] bitops_1.0-6 labeling_0.3
## [53] rmarkdown_1.10 gtable_0.2.0
## [55] R6_2.2.2 GenomicAlignments_1.14.2
## [57] dplyr_0.7.6 bindr_0.1.1
## [59] rprojroot_1.3-2 stringi_1.2.3
## [61] Rcpp_0.12.17 tidyselect_0.2.4